library(dplyr)
library(tibble)
library(ggplot2)
library(plotly) #interaktywne wykresy
library(ggcorrplot) #dla macierzy korelacji
library(magrittr)
library(knitr)
library(mlbench)
library(timechange)
library(caret)
library(doParallel)
library(kableExtra) #style kable
library(DT) #interaktywne tabeli
set.seed(7)
initial <- read.csv("mp_batteries.csv", nrows = 100)
classes <- sapply(initial, class)
df <- read.csv("mp_batteries.csv",
colClasses = classes)
| Battery.ID | Battery.Formula | Working.Ion | Formula.Charge | Formula.Discharge | Max.Delta.Volume | Average.Voltage | Gravimetric.Capacity | Volumetric.Capacity | Gravimetric.Energy | Volumetric.Energy | Atomic.Fraction.Charge | Atomic.Fraction.Discharge | Stability.Charge | Stability.Discharge | Steps | Max.Voltage.Step |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| mp-30_Al | Al0-2Cu | Al | Cu | Al2Cu | 3.0433992 | 0.0890331 | 1368.48055 | 5562.7901 | 121.840086 | 495.272533 | 0.0000000 | 0.6666667 | 0.0000000 | 0.0000000 | 1 | 0 |
| mp-1022721_Al | Al1-3Cu | Al | AlCu | Al3Cu | 1.2436528 | -0.0215863 | 1112.93655 | 4418.9798 | -24.024232 | -95.389622 | 0.5000000 | 0.7500000 | 0.0740612 | 0.0962458 | 1 | 0 |
| mp-8637_Al | Al0-5Mo | Al | Mo | Al5Mo | 4.7625743 | 0.1227568 | 1741.50416 | 7175.7017 | 213.781556 | 880.866507 | 0.0000000 | 0.8333333 | 0.4114601 | 0.0452120 | 1 | 0 |
| mp-129_Al | Al0-12Mo | Al | Mo | Al12Mo | 12.7238931 | 0.0431214 | 2298.81076 | 7346.2323 | 99.128013 | 316.780060 | 0.0000000 | 0.9230769 | 0.0000000 | 0.0114456 | 1 | 0 |
| mp-91_Al | Al0-12W | Al | W | Al12W | 12.4945977 | 0.0292342 | 1900.74513 | 7332.7186 | 55.566774 | 214.366205 | 0.0000000 | 0.9230769 | 0.0000000 | 0.0000000 | 1 | 0 |
| mp-1055908_Al | Al0-12Mn | Al | Mn | MnAl12 | 18.2361563 | 0.0397314 | 2547.69280 | 7592.9161 | 101.223298 | 301.676876 | 0.0000000 | 0.9230769 | 0.1454643 | 0.0000000 | 1 | 0 |
| mp-2658_Al | Al0-1Fe | Al | Fe | AlFe | 0.7711539 | 0.4717287 | 970.75702 | 5622.3562 | 457.933974 | 2652.226958 | 0.0000000 | 0.5000000 | 0.7613994 | 0.0000000 | 1 | 0 |
| mp-16722_Al | Al1-10.25V | Al | Al10V | Al41V4 | 0.0027108 | -0.0155827 | 61.37701 | 176.4151 | -0.956421 | -2.749028 | 0.9090909 | 0.9111111 | 0.0118097 | 0.0125861 | 1 | 0 |
| mp-998981_Al | Al1-3Ti | Al | TiAl | TiAl3 | 0.9562924 | 0.1602450 | 1248.40362 | 4248.4211 | 200.050419 | 680.788169 | 0.5000000 | 0.7500000 | 0.1415912 | 0.0244962 | 1 | 0 |
| mp-8633_K | K0-3Cr | K | Cr | K3Cr | 15.8029363 | -0.7487069 | 474.94813 | 667.5593 | -355.596958 | -499.806269 | 0.0000000 | 0.7500000 | 0.4025263 | 0.6621618 | 1 | 0 |
complete_rows <- sum(complete.cases(df))
rows_with_NA <- nrow(df) - complete_rows
rows_with_NA
## [1] 0
| Battery.ID | Battery.Formula | Working.Ion | Formula.Charge | Formula.Discharge | Max.Delta.Volume | Average.Voltage | Gravimetric.Capacity | Volumetric.Capacity | Gravimetric.Energy | Volumetric.Energy | Atomic.Fraction.Charge | Atomic.Fraction.Discharge | Stability.Charge | Stability.Discharge | Steps | Max.Voltage.Step | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Length:4351 | Length:4351 | Length:4351 | Length:4351 | Length:4351 | Min. : 0.00002 | Min. :-7.755 | Min. : 5.176 | Min. : 24.08 | Min. :-583.5 | Min. :-2208.1 | Min. :0.00000 | Min. :0.007407 | Min. :0.00000 | Min. :0.00000 | Min. :1.000 | Min. : 0.0000 | |
| Class :character | Class :character | Class :character | Class :character | Class :character | 1st Qu.: 0.01747 | 1st Qu.: 2.226 | 1st Qu.: 88.108 | 1st Qu.: 311.62 | 1st Qu.: 211.7 | 1st Qu.: 821.6 | 1st Qu.:0.00000 | 1st Qu.:0.086957 | 1st Qu.:0.03301 | 1st Qu.:0.01952 | 1st Qu.:1.000 | 1st Qu.: 0.0000 | |
| Mode :character | Mode :character | Mode :character | Mode :character | Mode :character | Median : 0.04203 | Median : 3.301 | Median : 130.691 | Median : 507.03 | Median : 401.8 | Median : 1463.8 | Median :0.00000 | Median :0.142857 | Median :0.07319 | Median :0.04878 | Median :1.000 | Median : 0.0000 | |
| NA | NA | NA | NA | NA | Mean : 0.37531 | Mean : 3.083 | Mean : 158.291 | Mean : 610.62 | Mean : 444.1 | Mean : 1664.0 | Mean :0.03986 | Mean :0.159077 | Mean :0.14257 | Mean :0.12207 | Mean :1.167 | Mean : 0.1503 | |
| NA | NA | NA | NA | NA | 3rd Qu.: 0.08595 | 3rd Qu.: 4.019 | 3rd Qu.: 187.600 | 3rd Qu.: 722.75 | 3rd Qu.: 614.4 | 3rd Qu.: 2252.3 | 3rd Qu.:0.04762 | 3rd Qu.:0.200000 | 3rd Qu.:0.13160 | 3rd Qu.:0.09299 | 3rd Qu.:1.000 | 3rd Qu.: 0.0000 | |
| NA | NA | NA | NA | NA | Max. :293.19322 | Max. :54.569 | Max. :2557.627 | Max. :7619.19 | Max. :5926.9 | Max. :18305.9 | Max. :0.90909 | Max. :0.993333 | Max. :6.48710 | Max. :6.27781 | Max. :6.000 | Max. :26.9607 |
| Working Ion | Count |
|---|---|
| Li | 2440 |
| Ca | 435 |
| Mg | 423 |
| Zn | 366 |
| Na | 309 |
| K | 107 |
| Al | 95 |
| Y | 93 |
| Rb | 50 |
| Cs | 33 |
Material baterii w stanie naładowanie jest najczęściej przedstawiony w postaci tlenków tzn. związków chemicznych, zbudowanych z tlenu i innego pierwiastka chemicznego.
Material baterii w stanie rozładowania najcześciej zawiera Li,
bo własnie on najczęściej jest głównym jonem.
Został wytrenerowany model Random Forest do przewidywania atrybutu Working.Ion na podstawie innych atrybutów z wykorzystaniem 10-krotnej walidacji krzyżowej. Na tej podstawie wyżnaczono ważności atrybutów numerycznych zbioru.
Zródła: https://www.geeksforgeeks.org/difference-between-varimp-caret-and-importance-randomforest-for-random-forest-in-r/ https://machinelearningmastery.com/feature-selection-with-the-caret-r-package/ https://topepo.github.io/caret/parallel-processing.html
Korelacja pomiędzy wszystkimi atrybutami numerycznymi zbioru przedstawiona na macierzach korelacji. Zastosowano dwie metody - Piersona oraz Spearmana. Metoda Piersona pozwala na określenie, czy istnieje związek liniowy między dwoma zmiennymi. Natomiast metoda Spearmana służy do pomiaru monotonicznej relacji między dwiema zmiennymi. Może ona zapewnić dokładniejsze wyniki, ponieważ nie jest tak dotknięta wartościami ekstremalnymi, jak współczynnik korelacji Pearsona.
Na podstawie tych obliczeń można zauważyć, że pary atrybutów Gravimetric.Energy - Volumetric.Energy, Gravimetric.Capacity - Volumetric.Capacity, Average.Voltage - Gravimetric.Energy, Average.Voltage - Volumetric.Energy, Atomic.Fraction.Charge - Atomic.Fraction.Discharge. są silną skorelowane przy stosowaniu obu metod.
Zródła: https://www.sthda.com/english/wiki/correlation-test-between-two-variables-in-r https://www.sthda.com/english/wiki/ggcorrplot-visualization-of-a-correlation-matrix-using-ggplot2 https://www.ibm.com/docs/pl/spss-statistics/saas?topic=correlations-power-analysis-one-sample-spearman-correlation-test https://www.analytixlabs.co.in/blog/spearman-vs-pearson-correlation/